#!/usr/bin/python

import sys
import re
from Bio import Entrez
from Bio import SeqIO

Entrez.email = "jimmysaw@gmail.com"

blastfile = sys.argv[1]
bf = open(blastfile, "rU")
bfl = bf.readlines()

seqid = re.compile('(\w+).refseq.blastp.tbl')
hit_acc = re.compile('\w+\|(\S+)\|')
orgname = re.compile('(\w+) (\w+)')

#p = seqid.match(blastfile)
#bfn = p.group(1)
bfn = blastfile.split('.')

num = len(bfl)

if len(bfl) == 0:
    print bfn[0].strip() + "\t" + "no BLAST hit"
else:
    line = bfl[0]
    s = line.split('\t')
    evalue = s[1]
    identities = s[3]
    similarities = s[4]
    locus_tag = s[5]
    accession = s[9]
    desc = s[13].rstrip()
    searchterm = ""
    pattern = hit_acc.match(accession)
    acc = pattern.group(1)
    handle = Entrez.efetch(db="nucleotide", id=acc, rettype="gb")
    rec = SeqIO.read(handle, "genbank")
    phylum = rec.annotations["taxonomy"][2]
    print locus_tag + "\t" + phylum

bf.close()
